Data Import

Import dataset

# Import Ziesel dataset
dat <- read.csv("Zeisel_preprocessed.csv", row.names = 1)
cell_type <- read.table("Zeisel_cell_info.txt", sep = "\t", header = 1)

# Get the labels for each cell
cluster_labels <- as.numeric(as.factor(cell_type$level1class))

Divide the dataset by cell type

cell_labels <- unique(as.factor(cell_type$level1class))
subset_data <- list()
subset_heatmap <- list()
load("meaningful_indices.RData")

sub_full <- dat[, indices]

for (i in 1:length(cell_labels)){
  label <- cell_labels[i]
  extracted <- dat[which(cell_type$level1class == label), indices]
  subset_data[[i]] <- extracted
  
  hclustered <- hclust(dist(t(extracted)))$order
  reordered <- extracted[, hclustered]
  subset_heatmap[[i]] <- reordered
}

col <- ncol(subset_data[[1]])
cell_num <- length(cell_labels)

Function to draw a heatmap

library(ggplot2)
library(scales)

# Store a heatmap
store_heatmap <- function(cor_mat, method, celltype, abs = F){
  if (abs){
    heatmap_title <- paste0("Abs_Heatmap of ", method,
                            " Correlation\n(", celltype, ")")  
  }
  else{
    heatmap_title <- paste0("Heatmap of ", method,
                            " Correlation\n(", celltype, ")")
  }
  
  heatmap_matrix <- reshape2::melt(cor_mat, na.rm = T)
  heatmap_matrix <- heatmap_matrix[!is.na(heatmap_matrix$value),]
  
  len <- 7
  break_vec <- round(quantile(cor_mat,
                              probs = seq(0, 1,length.out = len),
                              na.rm = T), 3)
  break_vec[1] <- min(cor_mat, na.rm = T) - 0.1
  break_vec[length(break_vec)] <- max(cor_mat, na.rm = T) + 0.1
  
  heatmap_matrix$value <- cut(heatmap_matrix$value, breaks = break_vec)
  
  ## ggplot to draw a heatmap with the viridis palette
  cor_heatmap <- ggplot(data = heatmap_matrix, 
                        aes(x = Var2, y = Var1, fill = value)) + 
    geom_tile() +
    scale_fill_manual(breaks = sort(unique(heatmap_matrix$value)), 
                      values = rev(viridis_pal(begin = 0, end = 1)(len-1))) +
    theme(axis.text.x = element_blank(),
          axis.text.y = element_blank(),
          axis.ticks.x = element_blank(),
          axis.ticks.y = element_blank(),
          axis.title.x = element_blank(),
          axis.title.y = element_blank(),
          panel.background = element_rect(fill = "transparent")) +
    labs(title = paste0(heatmap_title, label[i], " Correlation")) 
  return (cor_heatmap)
  }

1. Pearson Correlation

pearson_median <- c()
pearson_array <- array(NA, dim = c(col, col, cell_num))

pearson_max <- c()
pearson_max_pair1 <- c()
pearson_max_pair2 <- c()

pearson_min <- c()
pearson_min_pair1 <- c()
pearson_min_pair2 <- c()

pearson_heatmap <- list()

for (i in 1:length(cell_labels)){
  subset_pearson <- cor(subset_data[[i]], method = "pearson")

  pearson_median <- c(pearson_median, median(subset_pearson))
  
  subset_pearson[upper.tri(subset_pearson, diag = T)] <- NA
  pearson_array[, , i] <- subset_pearson
  
  pearson_vec <- sort(abs(subset_pearson), decreasing = T)
  
  max_idx <- which(abs(subset_pearson) == pearson_vec[1], arr.ind = T)
  idx1 <- max_idx[1]; idx2 <- max_idx[1,2]
  
  pearson_max_pair1 <- c(pearson_max_pair1, idx1)
  pearson_max_pair2 <- c(pearson_max_pair2, idx2)
  
  pearson_max <- c(pearson_max, subset_pearson[idx1, idx2])
  
  min_idx <- which(abs(subset_pearson) == rev(pearson_vec)[1], arr.ind = T)
  idx1 <- min_idx[1]; idx2 <- min_idx[1,2]
  
  pearson_min_pair1 <- c(pearson_min_pair1, idx1)
  pearson_min_pair2 <- c(pearson_min_pair2, idx2)
  
  pearson_min <- c(pearson_min, subset_pearson[idx1, idx2])
  
  subset_pearson_heatmap <- cor(subset_heatmap[[i]], method = "pearson")
  subset_pearson_heatmap[upper.tri(subset_pearson_heatmap, diag = T)] <- NA
  
  # ggplot to draw a heatmap with the viridis palette
  pearson_heatmap[[i]] <- store_heatmap(subset_pearson_heatmap, "Pearson",
                                        cell_labels[i], abs = F)
}

The pairs with highest pearson coefficient by cell type. (Close to perfect linear relationship)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- pearson_max_pair1[i]; idx2 <- pearson_max_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T, col = cell_labels[i],
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(pearson_max[i], 3),
                    "\nCell_type: ", cell_labels[i]))
}

The pairs with lowest pearson coefficient by cell type. (Close to imperfect linear dependence)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- pearson_min_pair1[i]; idx2 <- pearson_min_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T, col = cell_labels[i],
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(pearson_min[i], 3),
                    "\nCell_type: ", cell_labels[i]))
}

Pearson correlation of the gene pair which has the the biggest difference by cell type

diff_pearson <- apply(pearson_array, MARGIN = c(1,2),
                      FUN = function(x) {diff(range(x))})

for (i in 1:4){
  diff_pearson_vec <- sort(diff_pearson, decreasing = T)
  diff_pearson_ind <- which(diff_pearson == diff_pearson_vec[i],
                         arr.ind = T)

  diff_idx1 <- diff_pearson_ind[1]; diff_idx2 <- diff_pearson_ind[2]

  par(mfrow = c(2,4))
  plot(sub_full[, diff_idx1], sub_full[, diff_idx2], asp = T, pch = 16,
       col = cluster_labels,
       xlab = paste0(colnames(sub_dat)[diff_idx1], ", (", diff_idx1, ")"),
       ylab = paste0(colnames(sub_dat)[diff_idx2], ", (", diff_idx2, ")"),  
       main = paste0("Correlation of ",
                     round(cor(sub_full[, diff_idx1],
                               sub_full[, diff_idx2],
                               method = "pearson"), 3),
                     "\nDifference of ",
                     round(diff_pearson_vec[i], 3)),
       xlim = c(-1, 3), ylim = c(-2, 5))
  for (j in 1:length(cell_labels)){
    sub_dat <- subset_data[[j]]
    plot(sub_dat[,diff_idx1], sub_dat[,diff_idx2], asp = T,
         pch = 16, col = cell_labels[j],
         xlab = paste0(colnames(sub_dat)[diff_idx1], ", (", diff_idx1, ")"),
         ylab = paste0(colnames(sub_dat)[diff_idx2], ", (", diff_idx2, ")"), 
         main = paste0("Correlation of ",
                       round(pearson_array[diff_idx1, diff_idx2, j], 3),
                       "\nCell_type: ", cell_labels[j]),
         xlim = c(-1, 3), ylim = c(-2, 5))
  }
}

Heatmap of Pearson coefficient by cell type

library(gridExtra)
marrangeGrob(pearson_heatmap, nrow = 3, ncol = 3)

2. Spearman Correlation (Spearman’s rho)

spearman_median <- c()
spearman_array <- array(NA, dim = c(col, col, cell_num))

spearman_max <- c()
spearman_max_pair1 <- c()
spearman_max_pair2 <- c()

spearman_min <- c()
spearman_min_pair1 <- c()
spearman_min_pair2 <- c()

spearman_heatmap <- list()

for (i in 1:length(cell_labels)){
  subset_spearman <- cor(subset_data[[i]], method = "spearman")
  
  spearman_median <- c(spearman_median, median(subset_spearman))
  
  subset_spearman[upper.tri(subset_spearman, diag = T)] <- NA
  spearman_array[ , , i] <- subset_spearman
  
  spearman_vec <- sort(abs(subset_spearman), decreasing = T)
  
  max_idx <- which(abs(subset_spearman) == spearman_vec[1], arr.ind = T)
  idx1 <- max_idx[1]; idx2 <- max_idx[1,2]
  
  spearman_max_pair1 <- c(spearman_max_pair1, idx1)
  spearman_max_pair2 <- c(spearman_max_pair2, idx2)
  
  spearman_max <- c(spearman_max, subset_spearman[idx1, idx2])
  
  min_idx <- which(abs(subset_spearman) == rev(spearman_vec)[1], arr.ind = T)
  idx1 <- min_idx[1]; idx2 <- min_idx[1,2]
  
  spearman_min_pair1 <- c(spearman_min_pair1, idx1)
  spearman_min_pair2 <- c(spearman_min_pair2, idx2)
  
  spearman_min <- c(spearman_min, subset_spearman[idx1, idx2])
  
  subset_spearman_heatmap <- cor(subset_heatmap[[i]], method = "spearman")
  subset_spearman_heatmap[upper.tri(subset_spearman_heatmap, diag = T)] <- NA
  
  # ggplot to draw a heatmap with the viridis palette
  spearman_heatmap[[i]] <- store_heatmap(subset_spearman_heatmap, "Spearman",
                                        cell_labels[i])
}

The pairs with highest spearman’s rho by cell type. (Close to perfect monotonically dependence)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- spearman_max_pair1[i]; idx2 <- spearman_max_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
       col = cell_labels[i], pch = 16,
       xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
       ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
       main = paste0("Correlation of ", round(spearman_max[i], 3),
                     "\nCell_type: ", cell_labels[i]))
}

The pairs with lowest spearman’s rho by cell type. (Close to imperfect monotonically dependence)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- spearman_min_pair1[i]; idx2 <- spearman_min_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
       pch = 16, col = cell_labels[i],
       xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
       ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
       main = paste0("Correlation of ", round(spearman_min[i], 3),
                     "\nCell_type: ", cell_labels[i]))
}

Spearman correlation of the gene pair which has the the biggest difference by cell type

diff_spearman <- apply(spearman_array, MARGIN = c(1,2),
                      FUN = function(x) {diff(range(x))})
diff_spearman_vec <- sort(diff_spearman, decreasing = T)

for (i in 1:4){
  diff_spearman_ind <- which(diff_spearman == diff_spearman_vec[i],
                             arr.ind = T)
  diff_idx1 <- diff_spearman_ind[1]; diff_idx2 <- diff_spearman_ind[2]

  par(mfrow = c(2,4))
  plot(sub_full[, diff_idx1], sub_full[, diff_idx2], asp = T, pch = 16,
       col = cluster_labels,
       xlab = paste0(colnames(sub_dat)[diff_idx1], ", (", diff_idx1, ")"),
       ylab = paste0(colnames(sub_dat)[diff_idx2], ", (", diff_idx2, ")"),  
       main = paste0("Correlation of ",
                     round(cor(sub_full[, diff_idx1],
                               sub_full[, diff_idx2],
                               method = "spearman"), 3),
                     "\nDifference of ",
                     round(diff_spearman_vec[i], 3)),
       xlim = c(-1.5, 3), ylim = c(-1, 3))
  for (j in 1:length(cell_labels)){
    sub_dat <- subset_data[[j]]
    plot(sub_dat[,diff_idx1], sub_dat[,diff_idx2], asp = T,
         pch = 16, col = cell_labels[j],
         xlab = paste0(colnames(sub_dat)[diff_idx1], ", (", diff_idx1, ")"),
         ylab = paste0(colnames(sub_dat)[diff_idx2], ", (", diff_idx2, ")"), 
         main = paste0("Correlation of ",
                       round(spearman_array[diff_idx1, diff_idx2, j], 3),
                       "\nCell_type: ", cell_labels[j]),
         xlim = c(-1.5, 3), ylim = c(-1, 3))
  }
}

Heatmap of Spearman’s rho by cell type

marrangeGrob(spearman_heatmap, nrow = 3, ncol = 3)

3. Kendall’s tau

library(pcaPP)

kendall_median <- c()
kendall_array <- array(NA, dim = c(col, col, cell_num))

kendall_max <- c()
kendall_max_pair1 <- c()
kendall_max_pair2 <- c()

kendall_min <- c()
kendall_min_pair1 <- c()
kendall_min_pair2 <- c()

kendall_heatmap <- list()

for (i in 1:length(cell_labels)){
  subset_kendall <- cor.fk(subset_data[[i]])
  
  kendall_median <- c(kendall_median, median(subset_kendall))
  
  subset_kendall[upper.tri(subset_kendall, diag = T)] <- NA
  kendall_array[, , i] <- subset_kendall
  
  kendall_vec <- sort(abs(subset_kendall), decreasing = T)
  
  max_idx <- which(abs(subset_kendall) == kendall_vec[i], arr.ind = T)
  idx1 <- max_idx[1]; idx2 <- max_idx[1,2]
  
  kendall_max_pair1 <- c(kendall_max_pair1, idx1)
  kendall_max_pair2 <- c(kendall_max_pair2, idx2)
  
  kendall_max <- c(kendall_max, subset_kendall[idx1, idx2])
  
  min_idx <- which(abs(subset_kendall) == rev(kendall_vec)[1], arr.ind = T)
  idx1 <- min_idx[1]; idx2 <- min_idx[1,2]
  
  kendall_min_pair1 <- c(kendall_min_pair1, idx1)
  kendall_min_pair2 <- c(kendall_min_pair2, idx2)
  
  kendall_min <- c(kendall_min, subset_kendall[idx1, idx2])
  
  subset_kendall_heatmap <- cor.fk(subset_heatmap[[i]])
  subset_kendall_heatmap[upper.tri(subset_kendall_heatmap, diag = T)] <- NA
  
  # ggplot to draw a heatmap with the viridis palette
  kendall_heatmap[[i]] <- store_heatmap(subset_kendall_heatmap, "Kendall",
                                        cell_labels[i])
}

The pairs with highest kendall’s tau by cell type. (Close to perfect monotonically dependence)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- kendall_max_pair1[i]; idx2 <- kendall_max_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
       pch = 16, col = cell_labels[i],
       xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
       ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
       main = paste0("Correlation of ", round(kendall_max[i], 3),
                     "\nCell_type: ", cell_labels[i]))
}

The pairs with lowest kendall’s tau by cell type. (Close to imperfect monotonically dependence)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- kendall_min_pair1[i]; idx2 <- kendall_min_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
       pch = 16, col = cell_labels[i],
       xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
       ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
       main = paste0("Correlation of ", round(kendall_min[i], 3),
                     "\nCell_type: ", cell_labels[i]))
}

Kendall correlation of the gene pair which has the the biggest difference by cell type

diff_kendall <- apply(kendall_array, MARGIN = c(1,2),
                      FUN = function(x) {diff(range(x))})

diff_kendall_vec <- sort(diff_kendall, decreasing = T)

for (i in 1:4){
  diff_kendall_ind <- which(diff_kendall == diff_kendall_vec[i],
                            arr.ind = T)
  
  diff_idx1 <- diff_kendall_ind[1]; diff_idx2 <- diff_kendall_ind[2]

  par(mfrow = c(2,4))
  plot(sub_full[, diff_idx1], sub_full[, diff_idx2], asp = T, pch = 16,
       col = cluster_labels,
       xlab = paste0(colnames(sub_dat)[diff_idx1], ", (", diff_idx1, ")"),
       ylab = paste0(colnames(sub_dat)[diff_idx2], ", (", diff_idx2, ")"),  
       main = paste0("Correlation of ",
                     round(cor.fk(sub_full[, diff_idx1],
                                  sub_full[, diff_idx2]), 3),
                     "\nDifference of ",
                     round(diff_kendall_vec[i], 3)),
       xlim = c(-1, 3), ylim = c(-2, 4))
  for (j in 1:length(cell_labels)){
    sub_dat <- subset_data[[j]]
    plot(sub_dat[,diff_idx1], sub_dat[,diff_idx2], asp = T,
         pch = 16, col = cell_labels[j],
         xlab = paste0(colnames(sub_dat)[diff_idx1], ", (", diff_idx1, ")"),
         ylab = paste0(colnames(sub_dat)[diff_idx2], ", (", diff_idx2, ")"), 
         main = paste0("Correlation of ",
                       round(kendall_array[diff_idx1, diff_idx2, j], 3),
                       "\nCell_type: ", cell_labels[j]),
         xlim = c(-1, 3), ylim = c(-2, 4))
  }
}

Heatmap of Kendall’s tau by cell type

marrangeGrob(kendall_heatmap, nrow = 3, ncol = 3)

4. Distance Correlation

library(energy)

dist_median <- c()
dist_array <- array(NA, dim = c(col, col, cell_num))

dist_max <- c()
dist_max_pair1 <- c()
dist_max_pair2 <- c()

dist_min <- c()
dist_min_pair1 <- c()
dist_min_pair2 <- c()

dist_heatmap <- list()

for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  subset_dist <- matrix(nrow = ncol(sub_dat),
                        ncol = ncol(sub_dat))

  for (j in 2:ncol(sub_dat)){
    for (k in 1:(j-1)){
      subset_dist[j,k] <- dcor2d(as.numeric(sub_dat[, j]),
                                 as.numeric(sub_dat[, k]))
    }
  }
  dist_array[ , , i] <- subset_dist
  
  dist_median <- c(dist_median, median(subset_dist, na.rm = T))

  dist_vec <- sort(abs(subset_dist), decreasing = T)
  
  max_idx <- which(abs(subset_dist) == dist_vec[1], arr.ind = T)
  idx1 <- max_idx[1]; idx2 <- max_idx[1,2]
  
  dist_max_pair1 <- c(dist_max_pair1, idx1)
  dist_max_pair2 <- c(dist_max_pair2, idx2)
  
  dist_max <- c(dist_max, subset_dist[idx1, idx2])
  
  min_idx <- which(abs(subset_dist) == rev(dist_vec)[1], arr.ind = T)
  idx1 <- min_idx[1]; idx2 <- min_idx[1,2]
  
  dist_min_pair1 <- c(dist_min_pair1, idx1)
  dist_min_pair2 <- c(dist_min_pair2, idx2)
  
  dist_min <- c(dist_min, subset_dist[idx1, idx2])
  
  sub_heat <- subset_heatmap[[i]]
  subset_dist_heatmap <- matrix(nrow = ncol(sub_heat),
                                ncol = ncol(sub_heat))

  for (j in 2:ncol(sub_heat)){
    for (k in 1:(j-1)){
      subset_dist_heatmap[j,k] <- dcor(as.numeric(sub_heat[, j]),
                                       as.numeric(sub_heat[, k]))
    }
  }
  
  # ggplot to draw a heatmap with the viridis palette
  dist_heatmap[[i]] <- store_heatmap(subset_dist_heatmap, "Dist.Cor",
                                        cell_labels[i])
}

The pairs with highest distance correlation by cell type. (Close to perfect linear dependence)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- dist_max_pair1[i]; idx2 <- dist_max_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
       pch = 16, col = cell_labels[i],
       xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
       ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
       main = paste0("Correlation of ", round(dist_max[i], 3),
                     "\nCell_type: ", cell_labels[i]))
}

The pairs with lowest distance correlation by cell type. (Close to imperfect linear dependence)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- dist_min_pair1[i]; idx2 <- dist_min_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
       pch = 16, col = cell_labels[i],
       xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
       ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
       main = paste0("Correlation of ", round(dist_min[i], 3),
                     "\nCell_type: ", cell_labels[i]))
}

Distance correlation of the gene pair which has the the biggest difference by cell type

library(energy)

diff_dist <- apply(dist_array, MARGIN = c(1,2),
                      FUN = function(x) {diff(range(x))})

diff_dist_ind <- which(diff_dist == max(diff_dist, na.rm = T),
                         arr.ind = T)

diff_idx1 <- diff_dist_ind[1]; diff_idx2 <- diff_dist_ind[2]

for (i in 1:4){
  par(mfrow = c(2,4))
  plot(sub_full[, diff_idx1], sub_full[, diff_idx2], asp = T, pch = 16,
       col = cluster_labels,
       xlab = paste0(colnames(sub_dat)[diff_idx1], ", (", diff_idx1, ")"),
       ylab = paste0(colnames(sub_dat)[diff_idx2], ", (", diff_idx2, ")"),  
       main = paste0("Correlation of ",
                     round(dcor(as.numeric(sub_full[, diff_idx1]),
                                as.numeric(sub_full[, diff_idx2])), 3)))
  for (j in 1:length(cell_labels)){
    sub_dat <- subset_data[[j]]
    plot(sub_dat[,diff_idx1], sub_dat[,diff_idx2], asp = T,
         pch = 16, col = cell_labels[j],
         xlab = paste0(colnames(sub_dat)[diff_idx1], ", (", diff_idx1, ")"),
         ylab = paste0(colnames(sub_dat)[diff_idx2], ", (", diff_idx2, ")"), 
         main = paste0("Correlation of ",
                       round(dist_array[diff_idx1, diff_idx2, j], 3),
                       "\nCell_type: ", cell_labels[j]),
         xlim = c(-1, 3), ylim = c(-2, 5))
  }
}

Heatmap of distance correlation by cell type

marrangeGrob(dist_heatmap, nrow = 3, ncol = 3)

5. Hoeffding’s D measure

library(Hmisc)

hoeff_median <- c()
hoeff_array <- array(NA, dim = c(col, col, cell_num))

hoeff_max <- c()
hoeff_max_pair1 <- c()
hoeff_max_pair2 <- c()

hoeff_min <- c()
hoeff_min_pair1 <- c()
hoeff_min_pair2 <- c()

hoeff_heatmap <- list()

for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  
  subset_hoeff <- hoeffd(x = as.matrix(sub_dat))$D
  
  hoeff_median <- c(hoeff_median, median(subset_hoeff, na.rm = T))
  
  subset_hoeff[upper.tri(subset_hoeff, diag = T)] <- NA
  hoeff_array[, , i] <- subset_hoeff
  
  hoeff_vec <- sort(abs(subset_hoeff), decreasing = T)
  
  max_idx <- which(abs(subset_hoeff) == hoeff_vec[1], arr.ind = T)
  idx1 <- max_idx[1]; idx2 <- max_idx[1,2]
  
  hoeff_max_pair1 <- c(hoeff_max_pair1, idx1)
  hoeff_max_pair2 <- c(hoeff_max_pair2, idx2)
  
  hoeff_max <- c(hoeff_max, subset_hoeff[idx1, idx2])
  
  min_idx <- which(abs(subset_hoeff) == rev(hoeff_vec)[1], arr.ind = T)
  idx1 <- min_idx[1]; idx2 <- min_idx[1,2]
  
  hoeff_min_pair1 <- c(hoeff_min_pair1, idx1)
  hoeff_min_pair2 <- c(hoeff_min_pair2, idx2)
  
  hoeff_min <- c(hoeff_min, subset_hoeff[idx1, idx2])
  
  subset_hoeff_heatmap <- hoeffd(x = as.matrix(subset_heatmap[[i]]))$D
  
  subset_hoeff_heatmap[upper.tri(subset_hoeff_heatmap, diag = T)] <- NA
  
  # ggplot to draw a heatmap with the viridis palette
  hoeff_heatmap[[i]] <- store_heatmap(subset_hoeff_heatmap, "Hoeffding's D",
                                        cell_labels[i])
}

The pairs with highest hoeffiding’s D by cell type. (Highly correlated)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- hoeff_max_pair1[i]; idx2 <- hoeff_max_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
       pch = 16, col = cell_labels[i],
       xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
       ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
       main = paste0("Correlation of ", round(hoeff_max[i], 3),
                     "\nCell_type: ", cell_labels[i]))
}

The pairs with lowest hoeffiding’s D by cell type. (Seemingly independent)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- hoeff_min_pair1[i]; idx2 <- hoeff_min_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
       pch = 16, col = cell_labels[i],
       xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
       ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
       main = paste0("Correlation of ", round(hoeff_min[i], 3),
                     "\nCell_type: ", cell_labels[i]))
}

Hoeffding’s D of the gene pair which has the the biggest difference by cell type

library(Hmisc)

diff_hoeff <- apply(hoeff_array, MARGIN = c(1,2),
                      FUN = function(x) {diff(range(x))})

diff_hoeff_ind <- which(diff_hoeff == max(diff_hoeff, na.rm = T),
                         arr.ind = T)

diff_idx1 <- diff_hoeff_ind[1]; diff_idx2 <- diff_hoeff_ind[2]

for (i in 1:4){
  par(mfrow = c(2,4))
  plot(sub_full[, diff_idx1], sub_full[, diff_idx2], asp = T, pch = 16,
       col = cluster_labels,
       xlab = paste0(colnames(sub_dat)[diff_idx1], ", (", diff_idx1, ")"),
       ylab = paste0(colnames(sub_dat)[diff_idx2], ", (", diff_idx2, ")"),  
       main = paste0("Correlation of ",
                     round(hoeffd(sub_full[, diff_idx1],
                               sub_full[, diff_idx2])$D, 3)))
  for (j in 1:length(cell_labels)){
    sub_dat <- subset_data[[j]]
    plot(sub_dat[,diff_idx1], sub_dat[,diff_idx2], asp = T,
         pch = 16, col = cell_labels[j],
         xlab = paste0(colnames(sub_dat)[diff_idx1], ", (", diff_idx1, ")"),
         ylab = paste0(colnames(sub_dat)[diff_idx2], ", (", diff_idx2, ")"), 
         main = paste0("Correlation of ",
                       round(hoeff_array[diff_idx1, diff_idx2, i], 3),
                       "\nCell_type: ", cell_labels[i]),
         xlim = c(-1, 3), ylim = c(-2, 4))
  }
}

Heatmap of Hoeffiding’s D by cell type

marrangeGrob(hoeff_heatmap, nrow = 3, ncol = 3)

6. Mutual information (MI)

library(entropy)

MI_median <- c()
MI_array <- array(NA, dim = c(col, col, cell_num))

MI_max <- c()
MI_max_pair1 <- c()
MI_max_pair2 <- c()

MI_min <- c()
MI_min_pair1 <- c()
MI_min_pair2 <- c()

MI_heatmap <- list()

for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  subset_MI <- matrix(nrow = ncol(sub_dat),
                      ncol = ncol(sub_dat))

  for (j in 2:ncol(sub_dat)){
    for (k in 1:(j-1)){
    y2d <- discretize2d(as.matrix(sub_dat[, j]),
                                   as.matrix(sub_dat[, k]),
                                   numBins1 = 20,
                                   numBins2 = 20)
    subset_MI[j,k] <- as.numeric(mi.empirical(y2d))
    }
  }
  
  MI_array[, , i] <- subset_MI
  
  MI_median <- c(MI_median, median(subset_MI, na.rm = T))
  
  MI_vec <- sort(abs(subset_MI), decreasing = T)
  
  max_idx <- which(abs(subset_MI) == MI_vec[1], arr.ind = T)
  idx1 <- max_idx[1]; idx2 <- max_idx[1,2]
  
  MI_max_pair1 <- c(MI_max_pair1, idx1)
  MI_max_pair2 <- c(MI_max_pair2, idx2)
  
  MI_max <- c(MI_max, subset_MI[idx1, idx2])
  
  min_idx <- which(abs(subset_MI) == rev(MI_vec)[1], arr.ind = T)
  idx1 <- min_idx[1]; idx2 <- min_idx[1,2]
  
  MI_min_pair1 <- c(MI_min_pair1, idx1)
  MI_min_pair2 <- c(MI_min_pair2, idx2)
  
  MI_min <- c(MI_min, subset_MI[idx1, idx2])
  
  sub_heat <- subset_heatmap[[i]]
  subset_MI_heatmap <- matrix(nrow = ncol(sub_heat),
                      ncol = ncol(sub_heat))

  for (j in 2:ncol(sub_heat)){
    for (k in 1:(j-1)){
    y2d <- discretize2d(as.matrix(sub_heat[, j]),
                                   as.matrix(sub_heat[, k]),
                                   numBins1 = 20,
                                   numBins2 = 20)
    subset_MI_heatmap[j,k] <- as.numeric(mi.empirical(y2d))
    }
  }
  
  # ggplot to draw a heatmap with the viridis palette
  MI_heatmap[[i]] <- store_heatmap(subset_MI_heatmap, "MI",
                                        cell_labels[i])
}

The pairs with highest MI by cell type. (Highly correlated)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- MI_max_pair1[i]; idx2 <- MI_max_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
       pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
       ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
       main = paste0("Correlation of ", round(MI_max[i], 3),
                    "\nCell_type: ", cell_labels[i]))
}

The pairs with lowest MI by cell type. (Seemingly independent)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- MI_min_pair1[i]; idx2 <- MI_min_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(MI_min[i], 3),
                    "\nCell_type: ", cell_labels[i]))
}

Hoeffding’s D of the gene pair which has the the biggest difference by cell type

library(entropy)

diff_MI <- apply(MI_array, MARGIN = c(1,2),
                      FUN = function(x) {diff(range(x))})

diff_MI_ind <- which(diff_MI == max(diff_MI, na.rm = T),
                         arr.ind = T)

diff_idx1 <- diff_MI_ind[1]; diff_idx2 <- diff_MI_ind[2]

par(mfrow = c(2,4))
plot(sub_full[, diff_idx1], sub_full[, diff_idx2], asp = T, pch = 16,
     col = cluster_labels,
     xlab = paste0(colnames(sub_dat)[diff_idx1], ", (", diff_idx1, ")"),
     ylab = paste0(colnames(sub_dat)[diff_idx2], ", (", diff_idx2, ")"),  
     main = paste0("Correlation of ",
                   round(mi.empirical(discretize2d(as.matrix(sub_full[, diff_idx1]),
                                   as.matrix(sub_full[, diff_idx2]),
                                   numBins1 = 20,
                                   numBins2 = 20)), 3)))

for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  plot(sub_dat[,diff_idx1], sub_dat[,diff_idx2], asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[diff_idx1], ", (", diff_idx1, ")"),
      ylab = paste0(colnames(sub_dat)[diff_idx2], ", (", diff_idx2, ")"), 
      main = paste0("Correlation of ",
                    round(MI_array[diff_idx1, diff_idx2, i], 3),
                    "\nCell_type: ", cell_labels[i]),
      xlim = c(-1, 3), ylim = c(-2, 6))
}

Heatmap of MI by cell type

marrangeGrob(MI_heatmap, nrow = 3, ncol = 3)

7. Maximum Information Coefficient (MIC)

library(minerva)

MIC_median <- c()
MIC_array <- array(NA, dim = c(col, col, cell_num))

MIC_max <- c()
MIC_max_pair1 <- c()
MIC_max_pair2 <- c()

MIC_min <- c()
MIC_min_pair1 <- c()
MIC_min_pair2 <- c()

MIC_heatmap <- c()

for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  subset_MIC <- mine(sub_dat)$MIC
  
  MIC_median <- c(MIC_median, median(subset_MIC, na.rm = T))
  
  subset_MIC[upper.tri(subset_MIC, diag = T)] <- NA
  MIC_array[, , i] <- subset_MIC
  
  MIC_vec <- sort(abs(subset_MIC), decreasing = T)
  
  max_idx <- which(abs(subset_MIC) == MIC_vec[1], arr.ind = T)
  idx1 <- max_idx[1]; idx2 <- max_idx[1,2]
  
  MIC_max_pair1 <- c(MIC_max_pair1, idx1)
  MIC_max_pair2 <- c(MIC_max_pair2, idx2)
  
  MIC_max <- c(MIC_max, subset_MIC[idx1, idx2])
  
  min_idx <- which(abs(subset_MIC) == rev(MIC_vec)[1], arr.ind = T)
  idx1 <- min_idx[1]; idx2 <- min_idx[1,2]
  
  MIC_min_pair1 <- c(MIC_min_pair1, idx1)
  MIC_min_pair2 <- c(MIC_min_pair2, idx2)
  
  MIC_min <- c(MIC_min, subset_MIC[idx1, idx2])
  
  subset_MIC_heatmap <- mine(subset_heatmap[[i]])$MIC
  subset_MIC_heatmap[upper.tri(subset_MIC_heatmap, diag = T)] <- NA
  
  # ggplot to draw a heatmap with the viridis palette
  MIC_heatmap[[i]] <- store_heatmap(subset_MIC_heatmap, "MIC",
                                        cell_labels[i])
}

The pairs with highest MIC by cell type. (Highly correlated)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- MIC_max_pair1[i]; idx2 <- MIC_max_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(MIC_max[i], 3),
                    "\nCell_type: ", cell_labels[i]))
}

The pairs with lowest MIC by cell type. (Seemingly independent)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- MIC_min_pair1[i]; idx2 <- MIC_min_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(MIC_min[i], 3),
                    "\nCell_type: ", cell_labels[i]))
}

MIC of the gene pair which has the the biggest difference by cell type

library(minerva)

diff_MIC <- apply(MIC_array, MARGIN = c(1,2),
                      FUN = function(x) {diff(range(x))})

diff_MIC_ind <- which(diff_MIC == max(diff_MIC, na.rm = T),
                         arr.ind = T)

diff_idx1 <- diff_MIC_ind[1]; diff_idx2 <- diff_MIC_ind[2]

par(mfrow = c(2,4))
plot(sub_full[, diff_idx1], sub_full[, diff_idx2], asp = T, pch = 16,
     col = cluster_labels,
     xlab = paste0(colnames(sub_dat)[diff_idx1], ", (", diff_idx1, ")"),
     ylab = paste0(colnames(sub_dat)[diff_idx2], ", (", diff_idx2, ")"),  
     main = paste0("Correlation of ",
                   round(mine(sub_full[, diff_idx1],
                             sub_full[, diff_idx2])$MIC, 3)))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  plot(sub_dat[,diff_idx1], sub_dat[,diff_idx2], asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[diff_idx1], ", (", diff_idx1, ")"),
      ylab = paste0(colnames(sub_dat)[diff_idx2], ", (", diff_idx2, ")"), 
      main = paste0("Correlation of ",
                    round(MIC_array[diff_idx1, diff_idx2, i], 3),
                    "\nCell_type: ", cell_labels[i]),
      xlim = c(-1, 4), ylim = c(-3, 4))
}

Heatmap of MIC by cell type

marrangeGrob(MIC_heatmap, nrow = 3, ncol = 3)

8. Chatterjee’s method (XI)

library(XICOR)

XI_median <- c()
XI_array <- array(NA, dim = c(col, col, cell_num))

XI_max <- c()
XI_max_pair1 <- c()
XI_max_pair2 <- c()

XI_min <- c()
XI_min_pair1 <- c()
XI_min_pair2 <- c()

XI_heatmap <- list()

for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  subset_XI <- matrix(nrow = ncol(sub_dat),
                      ncol = ncol(sub_dat))

  for (j in 2:ncol(sub_dat)){
    for (k in 1:(j-1)){
      subset_XI[j,k] <- calculateXI(as.numeric(sub_dat[, j]),
                                    as.numeric(sub_dat[, k]))
    }
  }
  
  XI_array[, , i] <- subset_XI
  
  XI_median <- c(XI_median, median(subset_XI, na.rm = T))
  
  XI_vec <- sort(abs(subset_XI), decreasing = T)
  
  max_idx <- which(abs(subset_XI) == XI_vec[1], arr.ind = T)
  idx1 <- max_idx[1]; idx2 <- max_idx[1,2]
  
  XI_max_pair1 <- c(XI_max_pair1, idx1)
  XI_max_pair2 <- c(XI_max_pair2, idx2)
  
  XI_max <- c(XI_max, subset_XI[idx1, idx2])
  
  min_idx <- which(abs(subset_XI) == rev(XI_vec)[1], arr.ind = T)
  idx1 <- min_idx[1]; idx2 <- min_idx[1,2]
  
  XI_min_pair1 <- c(XI_min_pair1, idx1)
  XI_min_pair2 <- c(XI_min_pair2, idx2)
  
  XI_min <- c(XI_min, subset_XI[idx1, idx2])
  
  sub_heat <- subset_heatmap[[i]]
  
  subset_XI_heatmap <- matrix(nrow = ncol(sub_heat),
                      ncol = ncol(sub_heat))

  for (j in 2:ncol(sub_heat)){
    for (k in 1:(j-1)){
      subset_XI_heatmap[j,k] <- calculateXI(as.numeric(sub_heat[, j]),
                                    as.numeric(sub_heat[, k]))
    }
  }
  
  # ggplot to draw a heatmap with the viridis palette
  XI_heatmap[[i]] <- store_heatmap(subset_XI_heatmap, "XI",
                                        cell_labels[i])
}

The pairs with highest XI by cell type. (Highly correlated)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- XI_max_pair1[i]; idx2 <- XI_max_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(XI_max[i], 3),
                    "\nCell_type: ", cell_labels[i]))
}

The pairs with lowest XI by cell type. (Seemingly independent)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- XI_min_pair1[i]; idx2 <- XI_min_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(XI_min[i], 3),
                    "\nCell_type: ", cell_labels[i]))
}

XI of the gene pair which has the the biggest difference by cell type

library(XICOR)

diff_XI <- apply(XI_array, MARGIN = c(1,2),
                      FUN = function(x) {diff(range(x))})

diff_XI_ind <- which(diff_XI == max(diff_XI, na.rm = T),
                         arr.ind = T)

diff_idx1 <- diff_XI_ind[1]; diff_idx2 <- diff_XI_ind[2]

par(mfrow = c(2,4))
plot(sub_full[, diff_idx1], sub_full[, diff_idx2], asp = T, pch = 16,
     col = cluster_labels,
     xlab = paste0(colnames(sub_dat)[diff_idx1], ", (", diff_idx1, ")"),
     ylab = paste0(colnames(sub_dat)[diff_idx2], ", (", diff_idx2, ")"),  
     main = paste0("Correlation of ",
                   round(calculateXI(as.numeric(sub_full[, diff_idx1]),
                                    as.numeric(sub_full[, diff_idx2])), 3)))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  plot(sub_dat[,diff_idx1], sub_dat[,diff_idx2], asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[diff_idx1], ", (", diff_idx1, ")"),
      ylab = paste0(colnames(sub_dat)[diff_idx2], ", (", diff_idx2, ")"), 
      main = paste0("Correlation of ",
                    round(XI_array[diff_idx1, diff_idx2, i], 3),
                    "\nCell_type: ", cell_labels[i]),
      xlim = c(-1, 3), ylim = c(-2, 4))
}

Heatmap of XI by cell type

marrangeGrob(XI_heatmap, nrow = 3, ncol = 3)

9. Hilbert Schmidt Independence Criterion (HSIC)

library(dHSIC)

HSIC_median <- c()
HSIC_array <- array(NA, dim = c(col, col, cell_num))

HSIC_max <- c()
HSIC_max_pair1 <- c()
HSIC_max_pair2 <- c()

HSIC_min <- c()
HSIC_min_pair1 <- c()
HSIC_min_pair2 <- c()

HSIC_heatmap <- list()

for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  subset_HSIC <- matrix(nrow = ncol(sub_dat),
                      ncol = ncol(sub_dat))

  for (j in 2:ncol(sub_dat)){
    for (k in 1:(j-1)){
      subset_HSIC[j,k] <- dhsic(as.numeric(sub_dat[, j]),
                                as.numeric(sub_dat[, k]))$dHSIC
    }
  }
  
  HSIC_array[, , i] <- subset_HSIC
  
  HSIC_median <- c(HSIC_median, median(subset_HSIC, na.rm = T))
  
  HSIC_vec <- sort(abs(subset_HSIC), decreasing = T)
  
  max_idx <- which(abs(subset_HSIC) == HSIC_vec[1], arr.ind = T)
  idx1 <- max_idx[1]; idx2 <- max_idx[1,2]
  
  HSIC_max_pair1 <- c(HSIC_max_pair1, idx1)
  HSIC_max_pair2 <- c(HSIC_max_pair2, idx2)
  
  HSIC_max <- c(HSIC_max, subset_HSIC[idx1, idx2])
  
  min_idx <- which(abs(subset_HSIC) == rev(HSIC_vec)[1], arr.ind = T)
  idx1 <- min_idx[1]; idx2 <- min_idx[1,2]
  
  HSIC_min_pair1 <- c(HSIC_min_pair1, idx1)
  HSIC_min_pair2 <- c(HSIC_min_pair2, idx2)
  
  HSIC_min <- c(HSIC_min, subset_HSIC[idx1, idx2])
  
  sub_heat <- subset_heatmap[[i]]
  
  subset_HSIC_heatmap <- matrix(nrow = ncol(sub_heat),
                      ncol = ncol(sub_heat))

  for (j in 2:ncol(sub_heat)){
    for (k in 1:(j-1)){
      subset_HSIC_heatmap[j,k] <- dhsic(as.numeric(sub_heat[, j]),
                                        as.numeric(sub_heat[, k]))$dHSIC
    }
  }

  # ggplot to draw a heatmap with the viridis palette
  HSIC_heatmap[[i]] <- store_heatmap(subset_HSIC_heatmap, "HSIC",
                                        cell_labels[i])
}

The pairs with highest HSIC by cell type. (Highly correlated)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- HSIC_max_pair1[i]; idx2 <- HSIC_max_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(HSIC_max[i], 3),
                    "\nCell_type: ", cell_labels[i]))
}

The pairs with lowest HSIC by cell type. (Seemingly independent)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- HSIC_min_pair1[i]; idx2 <- HSIC_min_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(HSIC_min[i], 3),
                    "\nCell_type: ", cell_labels[i]))
}

HSIC of the gene pair which has the the biggest difference by cell type

library(dHSIC)

diff_HSIC <- apply(HSIC_array, MARGIN = c(1,2),
                      FUN = function(x) {diff(range(x))})

diff_HSIC_ind <- which(diff_HSIC == max(diff_HSIC, na.rm = T),
                         arr.ind = T)

diff_idx1 <- diff_HSIC_ind[1]; diff_idx2 <- diff_HSIC_ind[2]

par(mfrow = c(2,4))
plot(sub_full[, diff_idx1], sub_full[, diff_idx2], asp = T, pch = 16,
     col = cluster_labels,
     xlab = paste0(colnames(sub_dat)[diff_idx1], ", (", diff_idx1, ")"),
     ylab = paste0(colnames(sub_dat)[diff_idx2], ", (", diff_idx2, ")"),  
     main = paste0("Correlation of ",
                   round(dhsic(as.numeric(sub_full[, diff_idx1]),
                                    as.numeric(sub_full[, diff_idx2]))$dHSIC, 3)))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  plot(sub_dat[,diff_idx1], sub_dat[,diff_idx2], asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[diff_idx1], ", (", diff_idx1, ")"),
      ylab = paste0(colnames(sub_dat)[diff_idx2], ", (", diff_idx2, ")"), 
      main = paste0("Correlation of ",
                    round(HSIC_array[diff_idx1, diff_idx2, i], 3),
                    "\nCell_type: ", cell_labels[i]),
      xlim = c(-1, 4), ylim = c(-1, 4))
}

Heatmap of HSIC by cell type

marrangeGrob(HSIC_heatmap, nrow = 3, ncol = 3)

10. Blomqvist’s Beta

library(VineCopula)
library(reshape2)

Blomqvist_median <- c()
Blomqvist_array <- array(NA, dim = c(col, col, cell_num))

Blomqvist_max <- c()
Blomqvist_max_pair1 <- c()
Blomqvist_max_pair2 <- c()

Blomqvist_min <- c()
Blomqvist_min_pair1 <- c()
Blomqvist_min_pair2 <- c()

Blomqvist_heatmap <- list()

for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  
  subset_Blomqvist <- BetaMatrix(sub_dat)
  subset_Blomqvist[upper.tri(subset_Blomqvist, diag = T)]
  
  Blomqvist_array[, , i] <- subset_Blomqvist
  
  Blomqvist_median <- c(Blomqvist_median, median(subset_Blomqvist, na.rm = T))
  
  Blomqvist_vec <- sort(abs(subset_Blomqvist), decreasing = T)
  
  max_idx <- which(abs(subset_Blomqvist) == Blomqvist_vec[1], arr.ind = T)
  idx1 <- max_idx[1]; idx2 <- max_idx[1,2]
  
  Blomqvist_max_pair1 <- c(Blomqvist_max_pair1, idx1)
  Blomqvist_max_pair2 <- c(Blomqvist_max_pair2, idx2)
  
  Blomqvist_max <- c(Blomqvist_max, subset_Blomqvist[idx1, idx2])
  
  min_idx <- which(abs(subset_Blomqvist) == rev(Blomqvist_vec)[1], arr.ind = T)
  idx1 <- min_idx[1]; idx2 <- min_idx[1,2]
  
  Blomqvist_min_pair1 <- c(Blomqvist_min_pair1, idx1)
  Blomqvist_min_pair2 <- c(Blomqvist_min_pair2, idx2)
  
  Blomqvist_min <- c(Blomqvist_min, subset_Blomqvist[idx1, idx2])
  
  sub_heat <- subset_heatmap[[i]]
  
  subset_Blomqvist_heatmap <- BetaMatrix(sub_heat)
  subset_Blomqvist_heatmap[upper.tri(subset_Blomqvist_heatmap, diag = T)] <- NA
  
  # ggplot to draw a heatmap with the viridis palette
  Blomqvist_heatmap[[i]] <- store_heatmap(subset_Blomqvist_heatmap, "Beta",
                                          cell_labels[i])
}

The pairs with highest Blomqvist’s Beta by cell type. (Highly correlated)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- Blomqvist_max_pair1[i]; idx2 <- Blomqvist_max_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(Blomqvist_max[i], 3),
                    "\nCell_type: ", cell_labels[i]))
}

The pairs with lowest Blomqvist’s Beta by cell type. (Seemingly independent)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- Blomqvist_min_pair1[i]; idx2 <- Blomqvist_min_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(Blomqvist_min[i], 3),
                    "\nCell_type: ", cell_labels[i]))
}

Blomqvist’s Beta of the gene pair which has the the biggest difference by cell type

library(VineCopula)

diff_Blomqvist <- apply(Blomqvist_array, MARGIN = c(1,2),
                      FUN = function(x) {diff(range(x))})

diff_Blomqvist_ind <- which(diff_Blomqvist == max(diff_Blomqvist, na.rm = T),
                         arr.ind = T)

diff_idx1 <- diff_Blomqvist_ind[1]; diff_idx2 <- diff_Blomqvist_ind[2]

par(mfrow = c(2,4))
plot(sub_full[, diff_idx1], sub_full[, diff_idx2], asp = T, pch = 16,
     col = cluster_labels,
     xlab = paste0(colnames(sub_dat)[diff_idx1], ", (", diff_idx1, ")"),
     ylab = paste0(colnames(sub_dat)[diff_idx2], ", (", diff_idx2, ")"),  
     main = paste0("Correlation of ",
                   round(BetaMatrix(cbind(sub_full[, diff_idx1],
                                          sub_full[, diff_idx2]))[1,1], 3)))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  plot(sub_dat[,diff_idx1], sub_dat[,diff_idx2], asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[diff_idx1], ", (", diff_idx1, ")"),
      ylab = paste0(colnames(sub_dat)[diff_idx2], ", (", diff_idx2, ")"), 
      main = paste0("Correlation of ",
                    round(Blomqvist_array[diff_idx1, diff_idx2, i], 3),
                    "\nCell_type: ", cell_labels[i]),
      xlim = c(-1, 2.5), ylim = c(-1, 3))
}

Heatmap of Blomqvist’s Beta by cell type

marrangeGrob(Blomqvist_heatmap, nrow = 3, ncol = 3)

Median of correlation values by cell type and different measures.

median_df <- rbind(pearson_median, spearman_median,
                   kendall_median, dist_median,
                   hoeff_median, MI_median,
                   MIC_median, XI_median,
                   HSIC_median, Blomqvist_median)
rownames(median_df) <- c("Pearson", "Spearman", "Kendall",
                         "Dist_Cor", "Hoeff_Dist", "Mutual_Info",
                         "MIC", "XI", "HSIC", "Beta")
colnames(median_df) <- cell_labels

knitr::kable(median_df)
interneurons pyramidal SS pyramidal CA1 oligodendrocytes microglia endothelial-mural astrocytes_ependymal
Pearson 0.0759159 0.0907989 0.1194560 0.0833058 0.0969368 0.0869395 0.1013804
Spearman 0.0967453 0.1227442 0.1694427 0.0888736 0.1116679 0.0973767 0.1098153
Kendall 0.0655530 0.0830594 0.1140691 0.0603234 0.0751105 0.0660847 0.0751121
Dist_Cor 0.0822903 0.0758886 0.0736518 0.0703456 0.0848335 0.0762448 0.0916990
Hoeff_Dist 0.0231601 0.0228001 0.0231996 0.0181586 0.0166263 0.0193998 0.0210273
Mutual_Info 0.4697460 0.3582293 0.1941926 0.2178592 0.8858517 0.5109431 0.5437619
MIC 0.2593838 0.2387378 0.1942228 0.2007301 0.2885585 0.2598145 0.2663690
XI 0.0749831 0.0714698 0.0675475 0.0646937 0.0631053 0.0655693 0.0816143
HSIC 0.0061597 0.0053867 0.0046994 0.0042700 0.0066889 0.0053644 0.0063167
Beta 0.6000000 0.6190476 0.7529286 0.8439024 0.8571429 0.8553191 0.8392857